Up and Down the Python Data and Web Visualization Stack

USGS dataset listing every wind turbine in the United States:


In [35]:
from collections import OrderedDict

import bearcart
import bokeh
import bokeh.plotting as bp
from bokeh.plotting import output_notebook
import folium
import ggplot as gg
from ggplot import ggplot
from IPython.html.widgets import interact
import matplotlib.pyplot as plt
import mpld3
import numpy as np
import pandas as pd
import vincent

%matplotlib inline
mpld3.enable_notebook()
bearcart.initialize_notebook()
vincent.initialize_notebook()
folium.initialize_notebook()

# axis_color = 'black'
axis_color = '#d0d0d0'



In [36]:
df = pd.read_csv('USGS_WindTurbine_201307_cleaned.csv')
df.head()


Out[36]:
Unnamed: 0 Unique ID Site Name Online Year Turbine Manufacturer Turbine Model Tower Type Turbine MW Total Height Tower/Hub Height Blade Length Rotor Diameter Rotor Swept Area Latitude-Decimal Degrees Longitude-Decimal Degrees State County Attribute Confidence Location Confidence WENDI Site Name
0 0 1836 Sand Point 2012 Vestas V39 monopole 0.500 59.5 40 19.5 39 1194.59 55.3436 -160.4891 AK Aleutians East 2 2 unknown ...
1 1 1837 Sand Point 2012 Vestas V39 monopole 0.500 59.5 40 19.5 39 1194.59 55.3448 -160.4906 AK Aleutians East 2 2 unknown ...
2 2 1838 St. Paul Island 2007 Vestas V27 monopole 0.225 50.5 37 13.5 27 572.55 57.1571 -170.2352 AK Aleutians West 2 2 St. Paul Island ...
3 3 1839 St. Paul Island 1999 Vestas V27 monopole 0.225 50.5 37 13.5 27 572.55 57.1576 -170.2359 AK Aleutians West 2 2 St. Paul Island ...
4 4 1840 St. Paul Island 2007 Vestas V27 monopole 0.225 50.5 37 13.5 27 572.55 57.1576 -170.2373 AK Aleutians West 2 2 St. Paul Island ...

5 rows × 27 columns


In [37]:
ws = pd.read_table('CO_WS_2011_2012.txt')
ws = ws.set_index('Date & Time Stamp')
ws.index = ws.index.to_datetime()
ws.head()


Out[37]:
WS1_50mMean WS1_50mStdev WS1_50mMax WS1_50mMin WS2_50mMean WS2_50mStDev WS2_50mMax WS2_50mMin WS3_30mMean WS3_30mStDev WS3_30mMax WS3_30mMin WS4_40mMean WS4_40mStDev WS4_40mMax WS4_40mMin WD1_49mMean WD1_49mStDev WD1_49mMax WD1_49mMin
2011-06-03 00:00:00 9.50 1.12 11.70 6.40 9.39 1.03 11.39 6.43 7.96 1.33 10.97 4.51 8.76 1.17 11.01 4.95 172 7 173 240 ...
2011-06-03 00:10:00 8.37 0.56 10.19 6.78 8.27 0.56 9.86 6.43 6.83 0.76 9.06 3.75 7.60 0.64 9.49 5.70 172 5 169 240 ...
2011-06-03 00:20:00 8.38 0.43 9.44 7.17 8.28 0.41 9.48 7.20 7.13 0.64 8.69 4.89 7.81 0.45 9.10 6.83 169 5 177 240 ...
2011-06-03 00:30:00 7.48 1.17 9.44 3.76 7.43 1.12 9.48 3.77 6.43 1.03 8.29 3.37 6.93 1.03 9.10 3.43 163 11 166 240 ...
2011-06-03 00:40:00 7.13 0.79 9.06 5.26 7.10 0.76 9.09 5.28 6.21 0.67 7.91 2.99 6.61 0.66 8.34 3.81 172 12 166 240 ...

5 rows × 24 columns


In [38]:
# Rotor Diameter vs. Turbine Manufacturer
mf_grouped = df.groupby('Turbine Manufacturer')
mean_grouped = mf_grouped.mean().dropna()
mean_rd = mean_grouped.sort('Rotor Diameter')['Rotor Diameter']
rotor_diam = vincent.Bar(mean_rd)
rotor_diam.axis_titles(x='Turbine Manufacturer', y='Rotor Diameter')
# The Hard Way
from vincent.axes import AxisProperties
from vincent.properties import PropertySet
from vincent.values import ValueRef
for axis in rotor_diam.axes:
    axis.properties = AxisProperties()
    for prop in ['ticks', 'axis', 'major_ticks', 'minor_ticks']:
        setattr(axis.properties, prop, PropertySet(stroke=ValueRef(value=axis_color)))
    axis.properties.title = PropertySet(font_size=ValueRef(value=20), 
                                        fill=ValueRef(value=axis_color))
    axis.properties.labels = PropertySet(fill=ValueRef(value=axis_color))
rotor_diam.axes[0].properties.labels.angle = ValueRef(value=50)
rotor_diam.axes[0].properties.labels.align = ValueRef(value='left')
rotor_diam.axes[0].properties.title.dy = ValueRef(value=115)
rotor_diam.scales[2].range = ['#b48ead']
rotor_diam


Out[38]:

In [39]:
# Total Turbine Count
turbine_ct = mf_grouped.count().dropna().sort('Unique ID', ascending=False)['Unique ID']
num_turbines = (vincent.Bar(turbine_ct[:25])
                       .axis_titles(x='Turbine Manufacturer', 
                                    y='Number of Turbines in the US')
                       .colors(range_=['#6a9fb5']))
# Shortcuts!
def lighten_axes(vis, x_offset=50):
    (vis.common_axis_properties(color=axis_color, title_size=20)
        .x_axis_properties(label_angle=50, label_align='left', 
                           title_offset=x_offset)
        .y_axis_properties(title_offset=-40))
# If Area Chart
# num_turbines.scales[0].type = 'ordinal'
lighten_axes(num_turbines)
num_turbines


Out[39]:

In [40]:
# Turbine Count vs. Date
grouped_date = df.groupby(['Online Year', 'Turbine Manufacturer'])
by_year = grouped_date.count()['Unique ID'].reset_index()
by_year['Online Year'] = pd.to_datetime(by_year['Online Year'], coerce=True)
by_year = by_year.rename(columns={'Unique ID': 'Turbine Count'}).dropna()
by_year = by_year.pivot(index='Online Year', columns='Turbine Manufacturer', values='Turbine Count')
by_year = by_year[turbine_ct[:10].index.tolist()]

online_by_year = (vincent.StackedArea(by_year)
                         .axis_titles(x='Date', y='Turbine Count')
                         .legend(title='Turbine Manufacturer', text_color=axis_color)
                         .colors(range_=['#ac4142', '#d28445', '#f4bf75', '#90a959', 
                                         '#75b5aa', '#6a9fb5', '#aa759f', '#8f5536']))
lighten_axes(online_by_year, x_offset=30)
online_by_year


Out[40]:

In [41]:
height_diam = (vincent.GroupedBar(mean_grouped[['Tower/Hub Height', 'Rotor Diameter']]
                                  .sort(['Rotor Diameter', 'Tower/Hub Height'], ascending=False))
                      .axis_titles(x='Turbine Manufacturer', y='Meters')
                      .legend(title='Parameters', text_color=axis_color)
                      .colors(range_=['#f4bf75', '#75b5aa']))
lighten_axes(height_diam, 100)
height_diam


Out[41]:

In [42]:
november_2011 = ws['2011-11-15':'2011-12-01']
ws_line = (vincent.Line(november_2011['WS1_50mMean'])
                  .axis_titles(x='Date', y='Wind Speed (m/s)')
                  .colors(range_=['#d28445']))
lighten_axes(ws_line, x_offset=30)
ws_line


Out[42]:

In [43]:
# Rotor Diameter vs. Power
min_heights = df[df['Rotor Diameter'] > 10]
diameter_vs_mw = (vincent.Scatter(min_heights[['Turbine MW', 'Rotor Diameter']], iter_idx='Turbine MW')
                         .axis_titles(x='Power (MW)', y='Rotor Diameter (m)')
                         .colors(range_=['#75b5aa']))
lighten_axes(diameter_vs_mw, x_offset=30)
diameter_vs_mw


Out[43]:

In [44]:
(ggplot(gg.aes(x='Turbine MW', y='Rotor Swept Area'), data=min_heights[:500])
    + gg.geom_point(color='#75b5aa', size=75)
    + gg.ggtitle("Rotor Swept Area vs. Power")
    + gg.xlab("Power (MW)")
    + gg.ylab("Rotor Swept Area (m^2)"))


Out[44]:
<ggplot: (291491889)>

In [45]:
(ggplot(gg.aes(x='WS1_50mMean'), data=ws)
 + gg.geom_histogram(fill='#ac4142') 
 + gg.ggtitle("Wind Speed Distribution") 
 + gg.labs("Wind Speed (m)", "Freq"))


binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Out[45]:
<ggplot: (286771433)>

In [46]:
molten = pd.melt(ws, value_vars=['WS1_50mMean', 'WS2_50mMean', 'WS3_30mMean', 'WS4_40mMean'])
(ggplot(gg.aes(x='value'), data=molten)
 + gg.geom_histogram(fill='#aa759f')
 + gg.facet_wrap('variable')
 + gg.ggtitle("Wind Speed Distribution") 
 + gg.labs("Wind Speed (m)", "Freq"))


binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Out[46]:
<ggplot: (289419253)>

In [47]:
november_2011['Date'] = november_2011.index
@interact
def plot_gg_ws(column=november_2011.columns.tolist()):
    molten = pd.melt(november_2011, 
                     value_vars=[column], 
                     id_vars=['Date'])
    gg_wind_data = (ggplot(gg.aes(x='Date', y='value'), data=molten)
                   + gg.geom_line(color="steelblue")
                   + gg.stat_smooth(colour="red"))
    gg_wind_data.draw()



In [48]:
output_notebook()
import bokeh
subset = min_heights[:1000]
bp.figure(plot_width=800, background_fill= '#e5e5e5')
plot_tools = "pan, wheel_zoom, box_zoom, reset, hover"
bp.scatter(subset['Total Height'], subset['Rotor Swept Area'], 
           tools=plot_tools, size=10, color='purple', alpha=1.0)
bp.curplot().title = "Total Height (m) vs. Rotor Swept Area (m^2)"
bp.xaxis().axis_label = "Rotor Swept Area (m^2)"
bp.yaxis().axis_label = "Following"
hover = [t for t in bp.curplot().tools if isinstance(t, bokeh.objects.HoverTool)][0]
hover.tooltips = {"(Rotor Swept Area, Total Height)": "(@x, @y)"}
bp.show()


Bokeh Plot

Configuring embedded BokehJS mode.

Bokeh Plot
Plots

In [49]:
by_year.dropna()
built_by_year = bearcart.Chart(by_year, palette='spectrum2000',
                               height=500, width=750, plt_type='bar')
built_by_year


Out[49]:

In [50]:
all_ws = bearcart.Chart(november_2011[['WS1_50mMean', 'WS2_50mMean', 'WS3_30mMean', 'WS4_40mMean']], 
                        height=500, width=750, plt_type='area')
all_ws


Out[50]:

In [51]:
all_wd = bearcart.Chart(november_2011[['WD1_49mMean', 'WD2_38mMean']], 
                        height=500, width=750, plt_type='bar',
                        colors={'WD1_49mMean': '#75b5aa', 'WD2_38mMean': '#aa759f'})
all_wd


Out[51]:

In [52]:
import seaborn as sb
sb.set(rc={'text.color': axis_color, 'axes.labelcolor': axis_color})
rvm = df[['Rotor Diameter', 'Turbine MW', 'Turbine Manufacturer']].dropna()
row_mask = rvm['Turbine Manufacturer'].isin(['Vestas', 'Siemens', 'GE'])
rvm = rvm[row_mask]
rvm['Rotor Diameter'] = rvm['Rotor Diameter'].apply(lambda x: int(5 * round(float(x)/5)))
grouped_rvm = rvm.groupby(['Rotor Diameter', 'Turbine Manufacturer'])
rvm = grouped_rvm.mean().reset_index()
sb.lmplot("Rotor Diameter", "Turbine MW", col="Turbine Manufacturer", 
          hue="Turbine Manufacturer", data=rvm)


Out[52]:
<seaborn.axisgrid.FacetGrid at 0x11151c410>

In [53]:
sb.set_palette('husl')
nov_cols = november_2011.columns.tolist()
@interact
def sb_jointplot(x=nov_cols, y=nov_cols):
    sb.jointplot(x, y, data=november_2011, size=9)



In [54]:
sb.jointplot('WD1_49mMean', 'WD2_38mMean', data=november_2011, size=9, kind='kde')


Out[54]:
<seaborn.axisgrid.JointGrid at 0x115fcffd0>

In [55]:
import scipy.stats as stats
dists = ['Rayleigh', 'Weibull', 'Normal']
@interact
def plot_sb_dist(column=ws.columns.tolist(), dist=dists):
    plt.figure(figsize=(10, 8))
    dist_map = {
        'Rayleigh': stats.rayleigh,
        'Weibull': stats.exponweib,
        'Normal': stats.norm,
    }
    sb.distplot(ws[column], fit=dist_map[dist])



In [56]:
state_geo = r'us-states.json'
state_data = df.groupby('State').count().drop('State', axis=1).reset_index()
state_map = folium.Map(location=[48, -102], zoom_start=3)
state_map.geo_json(geo_path=state_geo, data=state_data,
                   columns=['State', 'Unique ID'],
                   key_on='feature.id',
                   fill_color='BuPu', fill_opacity=0.7, line_opacity=0.2,
                   legend_name='Turbine Count Per State')
state_map.create_map('turbines_per_state.html')
state_map


Out[56]:

In [57]:
Solano = df[df['Site Name'].isin(['Solano Wind 1', 'Solano Wind 3'])]
solano_map = folium.Map(location=[38.1067, -121.7735], zoom_start=12, 
                        tiles='Stamen Terrain')
solano_map.lat_lng_popover()
for row in Solano.iterrows():
    wtg_id = str(row[1]['Unique ID'])
    lat = row[1]['Latitude-Decimal Degrees']
    lng = row[1]['Longitude-Decimal Degrees']
    solano_map.polygon_marker(location=[lat, lng], fill_color='#f4bf75', 
                              radius=12, popup='WTG ID: ' + wtg_id)
solano_map.create_map('solano_map.html')
solano_map.render_iframe = True
solano_map


Out[57]:

In [58]:
# Let's do a little more mapping
# Source: http://wind.nrel.gov/Web_nrel/
wind_map = folium.Map(location=[45.48, -121.7], zoom_start=10,
                      tiles='Stamen Terrain')
locations = {
    'df_26512': (45.38, -121.52),
    'df_26795': (45.48, -121.72),
    'df_26798': (45.48, -121.62)
}
data = {
    'df_26512': pd.read_csv('26512.csv'),
    'df_26795': pd.read_csv('26795.csv'),
    'df_26798': pd.read_csv('26798.csv')
}
charts = {
    'df_26512': None, 
    'df_26795': None, 
    'df_26798': None
}
for k, df in data.items():
    df['Date'] = pd.to_datetime(df['Date(YYYY-MM-DD hh:mm:ss)'])
    data[k] = df.drop('Date(YYYY-MM-DD hh:mm:ss)', axis=1).set_index('Date')
    data[k] = data[k]['2006-12-01': '2006-12-31']
for k, chart in charts.items():
    charts[k] = (vincent.Line(data[k]['100m wind speed (m/s)'],
                              width=400, height=200)
                        .colors(range_=['#6a9fb5']))
    charts[k].axes[0].ticks = 4
    charts[k].axis_titles(x='Date', y='100m wind speed (m/s)')
    path = ''.join([k, '.json'])
    charts[k].to_json(path)
    wind_map.polygon_marker(location=locations[k], fill_color='#6a9fb5',
                            radius=12, popup=(charts[k], path))
wind_map.create_map('wind_map.html')
wind_map.render_iframe = True
wind_map


Out[58]:

In [59]:
from IPython.core.display import HTML
styles = open("custom_dark.css", "r").read()
HTML(styles)


Out[59]: